#include <xen/event.h>
#include <xen/trace.h>
-
/********
To use these shadow page tables, guests must not rely on the ACCESSED
********/
-
-static inline void free_shadow_page(struct mm_struct *m,
- struct pfn_info *page)
+static inline void free_shadow_page(
+ struct mm_struct *m, struct pfn_info *page)
{
- unsigned long type = page->u.inuse.type_info & PGT_type_mask;
-
m->shadow_page_count--;
- if (type == PGT_l1_page_table)
+ switch ( page->u.inuse.type_info & PGT_type_mask )
+ {
+ case PGT_l1_page_table:
perfc_decr(shadow_l1_pages);
- else if (type == PGT_l2_page_table)
+ break;
+
+ case PGT_l2_page_table:
perfc_decr(shadow_l2_pages);
- else printk("Free shadow weird page type pfn=%08x type=%08x\n",
- frame_table-page, page->u.inuse.type_info);
-
+ break;
+
+ default:
+ printk("Free shadow weird page type pfn=%08x type=%08x\n",
+ frame_table-page, page->u.inuse.type_info);
+ break;
+ }
+
free_domheap_page(page);
}
-static void __free_shadow_table( struct mm_struct *m )
+static void __free_shadow_table(struct mm_struct *m)
{
- int j, free=0;
- struct shadow_status *a,*next;
+ int i, free = 0;
+ struct shadow_status *x, *n;
- // the code assumes you're not using the page tables i.e.
- // the domain is stopped and cr3 is something else!!
-
- // walk the hash table and call free_shadow_page on all pages
+ /*
+ * WARNING! The shadow page table must not currently be in use!
+ * e.g., You are expected to have paused the domain and synchronized CR3.
+ */
- shadow_audit(m,1);
+ shadow_audit(m, 1);
- for(j=0;j<shadow_ht_buckets;j++)
+ /* Free each hash chain in turn. */
+ for ( i = 0; i < shadow_ht_buckets; i++ )
{
- a = &m->shadow_ht[j];
- if (a->pfn)
- {
- free_shadow_page( m,
- &frame_table[a->spfn_and_flags & PSH_pfn_mask] );
- a->pfn = 0;
- a->spfn_and_flags = 0;
- free++;
- }
- next=a->next;
- a->next=NULL;
- a=next;
- while(a)
+ /* Skip empty buckets. */
+ x = &m->shadow_ht[i];
+ if ( x->pfn == 0 )
+ continue;
+
+ /* Free the head page. */
+ free_shadow_page(
+ m, &frame_table[x->spfn_and_flags & PSH_pfn_mask]);
+
+ /* Reinitialise the head node. */
+ x->pfn = 0;
+ x->spfn_and_flags = 0;
+ n = x->next;
+ x->next = NULL;
+
+ free++;
+
+ /* Iterate over non-head nodes. */
+ for ( x = n; x != NULL; x = n )
{
- struct shadow_status *next = a->next;
+ /* Free the shadow page. */
+ free_shadow_page(
+ m, &frame_table[x->spfn_and_flags & PSH_pfn_mask]);
+
+ /* Re-initialise the chain node. */
+ x->pfn = 0;
+ x->spfn_and_flags = 0;
+
+ /* Add to the free list. */
+ n = x->next;
+ x->next = m->shadow_ht_free;
+ m->shadow_ht_free = x;
- free_shadow_page( m,
- &frame_table[a->spfn_and_flags & PSH_pfn_mask] );
- a->pfn = 0;
- a->spfn_and_flags = 0;
free++;
- a->next = m->shadow_ht_free;
- m->shadow_ht_free = a;
- a=next;
}
- shadow_audit(m,0);
- }
- SH_LOG("Free shadow table. Freed= %d",free);
-}
+ shadow_audit(m, 0);
+ }
-#define TABLE_OP_ZERO_L2 1
-#define TABLE_OP_ZERO_L1 2
-#define TABLE_OP_FREE_L1 3
+ SH_LOG("Free shadow table. Freed=%d.", free);
+}
-static inline int shadow_page_op( struct mm_struct *m, unsigned int op,
- unsigned int gpfn,
- struct pfn_info *spfn_info, int *work )
+static inline int __clear_shadow_page(
+ struct mm_struct *m, struct shadow_status *x)
{
- unsigned int spfn = spfn_info-frame_table;
- int restart = 0;
+ unsigned long *p;
+ int restart = 0;
+ struct pfn_info *spage = &frame_table[x->spfn_and_flags & PSH_pfn_mask];
- switch( op )
+ switch ( spage->u.inuse.type_info & PGT_type_mask )
{
- case TABLE_OP_ZERO_L2:
- {
- if ( (spfn_info->u.inuse.type_info & PGT_type_mask) ==
- PGT_l2_page_table )
- {
- unsigned long * spl1e = map_domain_mem( spfn<<PAGE_SHIFT );
-#ifdef __i386__
- memset(spl1e, 0, DOMAIN_ENTRIES_PER_L2_PAGETABLE * sizeof(*spl1e));
-#endif
- unmap_domain_mem( spl1e );
- }
- }
- break;
-
- case TABLE_OP_ZERO_L1:
- {
- if ( (spfn_info->u.inuse.type_info & PGT_type_mask) ==
- PGT_l1_page_table )
- {
- unsigned long * spl1e = map_domain_mem( spfn<<PAGE_SHIFT );
- memset( spl1e, 0, ENTRIES_PER_L1_PAGETABLE * sizeof(*spl1e) );
- unmap_domain_mem( spl1e );
- }
- }
- break;
-
- case TABLE_OP_FREE_L1:
- {
- if ( (spfn_info->u.inuse.type_info & PGT_type_mask) ==
- PGT_l1_page_table )
- {
- // lock is already held
- delete_shadow_status( m, gpfn );
- free_shadow_page( m, spfn_info );
- restart = 1; // we need to go to start of list again
- }
- }
-
- break;
-
- default:
- BUG();
+ /* We clear L2 pages by zeroing the guest entries. */
+ case PGT_l2_page_table:
+ p = map_domain_mem((spage - frame_table) << PAGE_SHIFT);
+ memset(p, 0, DOMAIN_ENTRIES_PER_L2_PAGETABLE * sizeof(*p));
+ unmap_domain_mem(p);
+ break;
+ /* We clear L1 pages by freeing them: no benefit from zeroing them. */
+ case PGT_l1_page_table:
+ delete_shadow_status(m, x->pfn);
+ free_shadow_page(m, spage);
+ restart = 1; /* We need to go to start of list again. */
+ break;
}
+
return restart;
}
-static void __scan_shadow_table( struct mm_struct *m, unsigned int op )
+static void __clear_shadow_state(struct mm_struct *m)
{
- int j, work=0;
- struct shadow_status *a, *next;
+ int i;
+ struct shadow_status *x;
- // the code assumes you're not using the page tables i.e.
- // the domain is stopped and cr3 is something else!!
+ shadow_audit(m, 1);
- // walk the hash table and call free_shadow_page on all pages
+ for ( i = 0; i < shadow_ht_buckets; i++ )
+ {
+ retry:
+ /* Skip empty buckets. */
+ x = &m->shadow_ht[i];
+ if ( x->pfn == 0 )
+ continue;
- shadow_audit(m,1);
+ if ( __clear_shadow_page(m, x) )
+ goto retry;
- for(j=0;j<shadow_ht_buckets;j++)
- {
- retry:
- a = &m->shadow_ht[j];
- next = a->next;
- if (a->pfn)
- {
- if ( shadow_page_op( m, op, a->pfn,
- &frame_table[a->spfn_and_flags & PSH_pfn_mask],
- &work ) )
- goto retry;
- }
- a=next;
- while(a)
- {
- next = a->next;
- if ( shadow_page_op( m, op, a->pfn,
- &frame_table[a->spfn_and_flags & PSH_pfn_mask],
- &work ) )
- goto retry;
- a=next;
- }
- shadow_audit(m,0);
+ for ( x = x->next; x != NULL; x = x->next )
+ if ( __clear_shadow_page(m, x) )
+ goto retry;
+
+ shadow_audit(m, 0);
}
- SH_VLOG("Scan shadow table. Work=%d l1=%d l2=%d", work, perfc_value(shadow_l1_pages), perfc_value(shadow_l2_pages));
+
+ SH_VLOG("Scan shadow table. l1=%d l2=%d",
+ perfc_value(shadow_l1_pages), perfc_value(shadow_l2_pages));
}
{
}
-int shadow_mode_enable( struct domain *p, unsigned int mode )
+int shadow_mode_enable(struct domain *p, unsigned int mode)
{
struct mm_struct *m = &p->mm;
- struct shadow_status **fptr;
- int i;
- // allocate hashtable
- m->shadow_ht = xmalloc(shadow_ht_buckets *
- sizeof(struct shadow_status));
- if( m->shadow_ht == NULL )
+ m->shadow_ht = xmalloc(
+ shadow_ht_buckets * sizeof(struct shadow_status));
+ if ( m->shadow_ht == NULL )
goto nomem;
-
memset(m->shadow_ht, 0, shadow_ht_buckets * sizeof(struct shadow_status));
- // allocate space for first lot of extra nodes
- m->shadow_ht_extras = xmalloc(sizeof(void*) +
- (shadow_ht_extra_size *
- sizeof(struct shadow_status)));
- if( m->shadow_ht_extras == NULL )
- goto nomem;
-
- memset( m->shadow_ht_extras, 0, sizeof(void*) + (shadow_ht_extra_size *
- sizeof(struct shadow_status)) );
-
- m->shadow_extras_count++;
-
- // add extras to free list
- fptr = &m->shadow_ht_free;
- for ( i=0; i<shadow_ht_extra_size; i++ )
- {
- *fptr = &m->shadow_ht_extras[i];
- fptr = &(m->shadow_ht_extras[i].next);
- }
- *fptr = NULL;
- *((struct shadow_status ** )
- &m->shadow_ht_extras[shadow_ht_extra_size]) = NULL;
-
if ( mode == SHM_logdirty )
{
- m->shadow_dirty_bitmap_size = (p->max_pages+63)&(~63);
+ m->shadow_dirty_bitmap_size = (p->max_pages + 63) & ~63;
m->shadow_dirty_bitmap =
- xmalloc( m->shadow_dirty_bitmap_size/8);
- if( m->shadow_dirty_bitmap == NULL )
+ xmalloc(m->shadow_dirty_bitmap_size/8);
+ if ( m->shadow_dirty_bitmap == NULL )
{
m->shadow_dirty_bitmap_size = 0;
- BUG();
goto nomem;
}
- memset(m->shadow_dirty_bitmap,0,m->shadow_dirty_bitmap_size/8);
+ memset(m->shadow_dirty_bitmap, 0, m->shadow_dirty_bitmap_size/8);
}
m->shadow_mode = mode;
- // call shadow_mk_pagetable
- __shadow_mk_pagetable( m );
+ __shadow_mk_pagetable(m);
return 0;
-nomem:
- if( m->shadow_ht ) {
- xfree( m->shadow_ht ); m->shadow_ht = NULL; };
-
- if( m->shadow_ht_extras ) {
- xfree( m->shadow_ht_extras ); m->shadow_ht_extras = NULL; };
-
+ nomem:
+ if ( m->shadow_ht != NULL )
+ xfree( m->shadow_ht );
+ m->shadow_ht = NULL;
return -ENOMEM;
}
void __shadow_mode_disable(struct domain *d)
{
struct mm_struct *m = &d->mm;
- struct shadow_status *next;
+ struct shadow_status *x, *n;
__free_shadow_table(m);
m->shadow_mode = 0;
SH_VLOG("freed tables count=%d l1=%d l2=%d",
- m->shadow_page_count, perfc_value(shadow_l1_pages),
- perfc_value(shadow_l2_pages));
+ m->shadow_page_count, perfc_value(shadow_l1_pages),
+ perfc_value(shadow_l2_pages));
- next = m->shadow_ht_extras;
- while ( next )
+ n = m->shadow_ht_extras;
+ while ( (x = n) != NULL )
{
- struct shadow_status * this = next;
m->shadow_extras_count--;
- next = *((struct shadow_status **)(&next[shadow_ht_extra_size]));
- xfree(this);
+ n = *((struct shadow_status **)(&x[shadow_ht_extra_size]));
+ xfree(x);
}
+ m->shadow_ht_extras = NULL;
+ ASSERT(m->shadow_extras_count == 0);
SH_LOG("freed extras, now %d", m->shadow_extras_count);
- if ( m->shadow_dirty_bitmap )
+ if ( m->shadow_dirty_bitmap != NULL )
{
- xfree( m->shadow_dirty_bitmap );
+ xfree(m->shadow_dirty_bitmap);
m->shadow_dirty_bitmap = 0;
m->shadow_dirty_bitmap_size = 0;
}
- // free the hashtable itself
- xfree( m->shadow_ht );
-
- m->shadow_ht = NULL;
- m->shadow_ht_extras = NULL;
+ xfree(m->shadow_ht);
+ m->shadow_ht = NULL;
}
-static int shadow_mode_table_op(struct domain *d,
- dom0_shadow_control_t *sc)
+static int shadow_mode_table_op(
+ struct domain *d, dom0_shadow_control_t *sc)
{
- unsigned int op = sc->op;
+ unsigned int op = sc->op;
struct mm_struct *m = &d->mm;
- int rc = 0;
-
- // since Dom0 did the hypercall, we should be running with it's page
- // tables right now. Calling flush on yourself would be really
- // stupid.
+ int i, rc = 0;
ASSERT(spin_is_locked(&d->mm.shadow_lock));
- if ( m == ¤t->mm )
- {
- printk("Don't try and flush your own page tables!\n");
- return -EINVAL;
- }
-
- SH_VLOG("shadow mode table op %08lx %08lx count %d",pagetable_val( m->pagetable),pagetable_val(m->shadow_table), m->shadow_page_count);
+ SH_VLOG("shadow mode table op %08lx %08lx count %d",
+ pagetable_val(m->pagetable), pagetable_val(m->shadow_table),
+ m->shadow_page_count);
- shadow_audit(m,1);
+ shadow_audit(m, 1);
- switch(op)
+ switch ( op )
{
case DOM0_SHADOW_CONTROL_OP_FLUSH:
__free_shadow_table( m );
- d->mm.shadow_fault_count = 0;
- d->mm.shadow_dirty_count = 0;
- d->mm.shadow_dirty_net_count = 0;
- d->mm.shadow_dirty_block_count = 0;
+ d->mm.shadow_fault_count = 0;
+ d->mm.shadow_dirty_count = 0;
+ d->mm.shadow_dirty_net_count = 0;
+ d->mm.shadow_dirty_block_count = 0;
break;
- case DOM0_SHADOW_CONTROL_OP_CLEAN: // zero all-non hypervisor
- {
- __scan_shadow_table( m, TABLE_OP_ZERO_L2 );
- __scan_shadow_table( m, TABLE_OP_ZERO_L1 );
-
- goto send_bitmap;
- }
-
+ case DOM0_SHADOW_CONTROL_OP_CLEAN:
+ __clear_shadow_state(m);
+
+ sc->stats.fault_count = d->mm.shadow_fault_count;
+ sc->stats.dirty_count = d->mm.shadow_dirty_count;
+ sc->stats.dirty_net_count = d->mm.shadow_dirty_net_count;
+ sc->stats.dirty_block_count = d->mm.shadow_dirty_block_count;
+
+ d->mm.shadow_fault_count = 0;
+ d->mm.shadow_dirty_count = 0;
+ d->mm.shadow_dirty_net_count = 0;
+ d->mm.shadow_dirty_block_count = 0;
+
+ if ( (d->max_pages > sc->pages) ||
+ (sc->dirty_bitmap == NULL) ||
+ (d->mm.shadow_dirty_bitmap == NULL) )
+ {
+ rc = -EINVAL;
+ goto out;
+ }
+
+ sc->pages = d->max_pages;
- case DOM0_SHADOW_CONTROL_OP_CLEAN2: // zero all L2, free L1s
- {
- int i,j,zero=1;
-
- __scan_shadow_table( m, TABLE_OP_ZERO_L2 );
- __scan_shadow_table( m, TABLE_OP_FREE_L1 );
-
- send_bitmap:
- sc->stats.fault_count = d->mm.shadow_fault_count;
- sc->stats.dirty_count = d->mm.shadow_dirty_count;
- sc->stats.dirty_net_count = d->mm.shadow_dirty_net_count;
- sc->stats.dirty_block_count = d->mm.shadow_dirty_block_count;
-
- d->mm.shadow_fault_count = 0;
- d->mm.shadow_dirty_count = 0;
- d->mm.shadow_dirty_net_count = 0;
- d->mm.shadow_dirty_block_count = 0;
-
- sc->pages = d->max_pages;
-
- if( d->max_pages > sc->pages ||
- !sc->dirty_bitmap || !d->mm.shadow_dirty_bitmap )
- {
- rc = -EINVAL;
- goto out;
- }
-
-
-#define chunk (8*1024) // do this in 1KB chunks for L1 cache
-
- for(i=0;i<d->max_pages;i+=chunk)
- {
- int bytes = (( ((d->max_pages-i) > (chunk))?
- (chunk):(d->max_pages-i) ) + 7) / 8;
-
- copy_to_user( sc->dirty_bitmap + (i/(8*sizeof(unsigned long))),
- d->mm.shadow_dirty_bitmap +(i/(8*sizeof(unsigned long))),
- bytes );
-
- for(j=0; zero && j<bytes/sizeof(unsigned long);j++)
- {
- if( d->mm.shadow_dirty_bitmap[j] != 0 )
- zero = 0;
- }
-
- memset( d->mm.shadow_dirty_bitmap +(i/(8*sizeof(unsigned long))),
- 0, bytes);
- }
-
-#if 0 /* This optimisation is dangerous for some uses of this function.
- disable for the moment */
- /* Might as well stop the domain as an optimization. */
- if ( zero )
- domain_pause_by_systemcontroller(d);
-#endif
+#define chunk (8*1024) /* Transfer and clear in 1kB chunks for L1 cache. */
+ for ( i = 0; i < d->max_pages; i += chunk )
+ {
+ int bytes = ((((d->max_pages - i) > chunk) ?
+ chunk : (d->max_pages - i)) + 7) / 8;
+
+ copy_to_user(
+ sc->dirty_bitmap + (i/(8*sizeof(unsigned long))),
+ d->mm.shadow_dirty_bitmap +(i/(8*sizeof(unsigned long))),
+ bytes);
+
+ memset(
+ d->mm.shadow_dirty_bitmap + (i/(8*sizeof(unsigned long))),
+ 0, bytes);
+ }
- break;
- }
+ break;
case DOM0_SHADOW_CONTROL_OP_PEEK:
- {
- int i;
-
- sc->stats.fault_count = d->mm.shadow_fault_count;
- sc->stats.dirty_count = d->mm.shadow_dirty_count;
- sc->stats.dirty_net_count = d->mm.shadow_dirty_net_count;
- sc->stats.dirty_block_count = d->mm.shadow_dirty_block_count;
-
- if( d->max_pages > sc->pages ||
- !sc->dirty_bitmap || !d->mm.shadow_dirty_bitmap )
- {
- rc = -EINVAL;
- goto out;
- }
-
- sc->pages = d->max_pages;
-
-#define chunk (8*1024) // do this in 1KB chunks for L1 cache
-
- for(i=0;i<d->max_pages;i+=chunk)
- {
- int bytes = (( ((d->max_pages-i) > (chunk))?
- (chunk):(d->max_pages-i) ) + 7) / 8;
-
- copy_to_user( sc->dirty_bitmap + (i/(8*sizeof(unsigned long))),
- d->mm.shadow_dirty_bitmap +(i/(8*sizeof(unsigned long))),
- bytes );
- }
-
- break;
- }
+ sc->stats.fault_count = d->mm.shadow_fault_count;
+ sc->stats.dirty_count = d->mm.shadow_dirty_count;
+ sc->stats.dirty_net_count = d->mm.shadow_dirty_net_count;
+ sc->stats.dirty_block_count = d->mm.shadow_dirty_block_count;
+
+ if ( (d->max_pages > sc->pages) ||
+ (sc->dirty_bitmap == NULL) ||
+ (d->mm.shadow_dirty_bitmap == NULL) )
+ {
+ rc = -EINVAL;
+ goto out;
+ }
+
+ sc->pages = d->max_pages;
+ copy_to_user(
+ sc->dirty_bitmap, d->mm.shadow_dirty_bitmap, (d->max_pages+7)/8);
- default:
- BUG();
+ break;
+ default:
+ BUG();
}
-
-out:
-
+ out:
SH_VLOG("shadow mode table op : page count %d", m->shadow_page_count);
-
- shadow_audit(m,1);
-
- // call shadow_mk_pagetable
- __shadow_mk_pagetable( m );
-
+ shadow_audit(m, 1);
+ __shadow_mk_pagetable(m);
return rc;
}
int shadow_mode_control(struct domain *d, dom0_shadow_control_t *sc)
{
unsigned int cmd = sc->op;
- int rc = 0;
+ int rc = 0;
+
+ if ( unlikely(d == current) )
+ {
+ DPRINTK("Don't try to do a shadow op on yourself!\n");
+ return -EINVAL;
+ }
domain_pause(d);
synchronise_pagetables(~0UL);
shadow_lock(&d->mm);
- if ( cmd == DOM0_SHADOW_CONTROL_OP_OFF )
+ switch ( cmd )
{
+ case DOM0_SHADOW_CONTROL_OP_OFF:
shadow_mode_disable(d);
- }
- else if ( cmd == DOM0_SHADOW_CONTROL_OP_ENABLE_TEST )
- {
+ break;
+
+ case DOM0_SHADOW_CONTROL_OP_ENABLE_TEST:
shadow_mode_disable(d);
rc = shadow_mode_enable(d, SHM_test);
- }
- else if ( cmd == DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY )
- {
+ break;
+
+ case DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY:
shadow_mode_disable(d);
rc = shadow_mode_enable(d, SHM_logdirty);
- }
- else if ( shadow_mode(d) &&
- (cmd >= DOM0_SHADOW_CONTROL_OP_FLUSH) &&
- (cmd <= DOM0_SHADOW_CONTROL_OP_CLEAN2) )
- {
- rc = shadow_mode_table_op(d, sc);
- }
- else
- {
- rc = -EINVAL;
+ break;
+
+ default:
+ if ( shadow_mode(d) &&
+ (cmd >= DOM0_SHADOW_CONTROL_OP_FLUSH) &&
+ (cmd <= DOM0_SHADOW_CONTROL_OP_PEEK) )
+ rc = shadow_mode_table_op(d, sc);
+ else
+ rc = -EINVAL;
+ break;
}
shadow_unlock(&d->mm);
static inline struct pfn_info *alloc_shadow_page(struct mm_struct *m)
{
- struct pfn_info *page;
+ struct pfn_info *page = alloc_domheap_page(NULL);
+
m->shadow_page_count++;
- page = alloc_domheap_page(NULL);
-
- if( unlikely(page == NULL) )
- {
- printk("Couldn't alloc shadow page! count=%d\n",
- m->shadow_page_count);
- SH_VLOG("Shadow tables l1=%d l2=%d",
- perfc_value(shadow_l1_pages),
- perfc_value(shadow_l2_pages));
- BUG(); // FIXME: try a shadow flush to free up some memory
- }
-
- return page;
+
+ if ( unlikely(page == NULL) )
+ {
+ printk("Couldn't alloc shadow page! count=%d\n",
+ m->shadow_page_count);
+ SH_VLOG("Shadow tables l1=%d l2=%d",
+ perfc_value(shadow_l1_pages),
+ perfc_value(shadow_l2_pages));
+ BUG(); /* XXX FIXME: try a shadow flush to free up some memory. */
+ }
+
+ return page;
}
-void unshadow_table( unsigned long gpfn, unsigned int type )
+void unshadow_table(unsigned long gpfn, unsigned int type)
{
- unsigned long spfn;
- struct domain *d = frame_table[gpfn].u.inuse.domain;
+ unsigned long spfn;
+ struct domain *d = frame_table[gpfn].u.inuse.domain;
- SH_VLOG("unshadow_table type=%08x gpfn=%08lx",
- type,
- gpfn );
+ SH_VLOG("unshadow_table type=%08x gpfn=%08lx", type, gpfn);
perfc_incrc(unshadow_table_count);
- // this function is the same for both l1 and l2 tables
-
- // even in the SMP guest case, there won't be a race here as
- // this CPU was the one that cmpxchg'ed the page to invalid
-
+ /*
+ * This function is the same for all p.t. pages. Even for multi-processor
+ * guests there won't be a race here as this CPU was the one that
+ * cmpxchg'ed the page to invalid.
+ */
spfn = __shadow_status(&d->mm, gpfn) & PSH_pfn_mask;
-
delete_shadow_status(&d->mm, gpfn);
-
- free_shadow_page(&d->mm, &frame_table[spfn] );
-
+ free_shadow_page(&d->mm, &frame_table[spfn]);
}
-
unsigned long shadow_l2_table(
- struct mm_struct *m, unsigned long gpfn )
+ struct mm_struct *m, unsigned long gpfn)
{
struct pfn_info *spfn_info;
- unsigned long spfn;
- l2_pgentry_t *spl2e, *gpl2e;
- int i;
+ unsigned long spfn;
+ l2_pgentry_t *spl2e;
- SH_VVLOG("shadow_l2_table( %08lx )",gpfn);
+ SH_VVLOG("shadow_l2_table( %08lx )", gpfn);
perfc_incrc(shadow_l2_table_count);
- // XXX in future, worry about racing in SMP guests
- // -- use cmpxchg with PSH_pending flag to show progress (and spin)
-
- spfn_info = alloc_shadow_page(m);
-
- ASSERT( spfn_info ); // XXX deal with failure later e.g. blow cache
+ if ( (spfn_info = alloc_shadow_page(m)) != NULL )
+ BUG(); /* XXX Deal gracefully with failure. */
spfn_info->u.inuse.type_info = PGT_l2_page_table;
perfc_incr(shadow_l2_pages);
- spfn = (unsigned long) (spfn_info - frame_table);
+ spfn = spfn_info - frame_table;
- // mark pfn as being shadowed, update field to point at shadow
+ /* Mark pfn as being shadowed; update field to point at shadow. */
set_shadow_status(m, gpfn, spfn | PSH_shadowed);
- // we need to do this before the linear map is set up
- spl2e = (l2_pgentry_t *) map_domain_mem(spfn << PAGE_SHIFT);
+ spl2e = (l2_pgentry_t *)map_domain_mem(spfn << PAGE_SHIFT);
+
+ /*
+ * We could proactively fill in PDEs for pages that are already shadowed.
+ * However, we tried it and it didn't help performance. This is simpler.
+ */
+ memset(spl2e, 0, DOMAIN_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
#ifdef __i386__
- // get hypervisor and 2x linear PT mapings installed
+ /* Install hypervisor and 2x linear p.t. mapings. */
memcpy(&spl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
&idle_pg_table[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
spl2e[SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
mk_l2_pgentry((spfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
spl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] =
- mk_l2_pgentry(__pa(frame_table[gpfn].u.inuse.domain->mm.perdomain_pt) |
+ mk_l2_pgentry(__pa(frame_table[gpfn].u.inuse.domain->mm.perdomain_pt) |
__PAGE_HYPERVISOR);
#endif
- // can't use the linear map as we may not be in the right PT
- gpl2e = (l2_pgentry_t *) map_domain_mem(gpfn << PAGE_SHIFT);
+ unmap_domain_mem(spl2e);
- // proactively create entries for pages that are already shadowed
- for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
- {
- unsigned long spte = 0;
+ SH_VLOG("shadow_l2_table( %08lx -> %08lx)", gpfn, spfn);
+ return spfn;
+}
-#if 0 // Turns out this doesn't really help
- unsigned long gpte;
+static void shadow_map_l1_into_current_l2(unsigned long va)
+{
+ struct mm_struct *m = ¤t->mm;
+ unsigned long *gpl1e, *spl1e, gpde, spde, gl1pfn, sl1pfn, sl1ss;
+ struct pfn_info *sl1pfn_info;
+ int i;
- gpte = l2_pgentry_val(gpl2e[i]);
+ gpde = l2_pgentry_val(linear_l2_table[va >> L2_PAGETABLE_SHIFT]);
- if (gpte & _PAGE_PRESENT)
- {
- unsigned long s_sh =
- __shadow_status(p, gpte>>PAGE_SHIFT);
+ gl1pfn = gpde >> PAGE_SHIFT;
- l2pde_general( m, &gpte, &spte, s_sh );
+ sl1ss = __shadow_status(m, gl1pfn);
+ if ( !(sl1ss & PSH_shadowed) )
+ {
+ /* This L1 is NOT already shadowed so we need to shadow it. */
+ SH_VVLOG("4a: l1 not shadowed ( %08lx )", sl1pfn);
- }
-#endif
+ sl1pfn_info = alloc_shadow_page(m);
+ sl1pfn_info->u.inuse.type_info = PGT_l1_page_table;
+
+ sl1pfn = sl1pfn_info - frame_table;
- spl2e[i] = mk_l2_pgentry( spte );
+ perfc_incrc(shadow_l1_table_count);
+ perfc_incr(shadow_l1_pages);
- }
+ set_shadow_status(m, gl1pfn, PSH_shadowed | sl1pfn);
- // its arguable we should 'preemptively shadow' a few active L1 pages
- // to avoid taking a string of faults when 'jacking' a running domain
+ l2pde_general(m, &gpde, &spde, sl1pfn);
- unmap_domain_mem( gpl2e );
- unmap_domain_mem( spl2e );
+ linear_l2_table[va>>L2_PAGETABLE_SHIFT] = mk_l2_pgentry(gpde);
+ shadow_linear_l2_table[va>>L2_PAGETABLE_SHIFT] =
+ mk_l2_pgentry(spde);
- SH_VLOG("shadow_l2_table( %08lx -> %08lx)",gpfn,spfn);
+ gpl1e = (unsigned long *) &(linear_pg_table[
+ (va>>PAGE_SHIFT) & ~(ENTRIES_PER_L1_PAGETABLE-1)]);
- return spfn;
-}
+ spl1e = (unsigned long *) &shadow_linear_pg_table[
+ (va>>PAGE_SHIFT) & ~(ENTRIES_PER_L1_PAGETABLE-1)];
+ for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ )
+ l1pte_propagate_from_guest(m, &gpl1e[i], &spl1e[i]);
+ }
+ else
+ {
+ /* This L1 is shadowed already, but the L2 entry is missing. */
+ SH_VVLOG("4b: was shadowed, l2 missing ( %08lx )", sl1pfn);
+
+ sl1pfn = sl1ss & PSH_pfn_mask;
+ l2pde_general(m, &gpde, &spde, sl1pfn);
-int shadow_fault( unsigned long va, long error_code )
+ linear_l2_table[va >> L2_PAGETABLE_SHIFT] = mk_l2_pgentry(gpde);
+ shadow_linear_l2_table[va >> L2_PAGETABLE_SHIFT] = mk_l2_pgentry(spde);
+ }
+}
+
+int shadow_fault(unsigned long va, long error_code)
{
unsigned long gpte, spte;
struct mm_struct *m = ¤t->mm;
SH_VVLOG("shadow_fault( va=%08lx, code=%ld )", va, error_code );
- check_pagetable( current, current->mm.pagetable, "pre-sf" );
+ check_pagetable(m, current->mm.pagetable, "pre-sf");
+
+ /*
+ * STEP 1. A fast-reject set of checks with no locking.
+ */
- if ( unlikely(__get_user(gpte, (unsigned long*)&linear_pg_table[va>>PAGE_SHIFT])) )
+ if ( unlikely(__get_user(gpte, (unsigned long *)
+ &linear_pg_table[va >> PAGE_SHIFT])) )
{
SH_VVLOG("shadow_fault - EXIT: read gpte faulted" );
- return 0; // propagate to guest
+ return 0;
}
- if ( ! (gpte & _PAGE_PRESENT) )
+ if ( !(gpte & _PAGE_PRESENT) )
{
SH_VVLOG("shadow_fault - EXIT: gpte not present (%lx)",gpte );
- return 0; // we're not going to be able to help
+ return 0;
}
- if ( (error_code & 2) && ! (gpte & _PAGE_RW) )
+ if ( (error_code & 2) && !(gpte & _PAGE_RW) )
{
- // write fault on RO page
+ /* Write fault on a read-only mapping. */
return 0;
}
- // take the lock and reread gpte
+ /*
+ * STEP 2. Take the shadow lock and re-check the guest PTE.
+ */
shadow_lock(m);
-
- if ( unlikely(__get_user(gpte, (unsigned long*)&linear_pg_table[va>>PAGE_SHIFT])) )
+
+ if ( unlikely(__get_user(gpte, (unsigned long *)
+ &linear_pg_table[va >> PAGE_SHIFT])) )
{
SH_VVLOG("shadow_fault - EXIT: read gpte faulted" );
shadow_unlock(m);
- return 0; // propagate to guest
+ return 0;
}
if ( unlikely(!(gpte & _PAGE_PRESENT)) )
{
SH_VVLOG("shadow_fault - EXIT: gpte not present (%lx)",gpte );
shadow_unlock(m);
- return 0; // we're not going to be able to help
+ return 0;
}
- if ( error_code & 2 )
- { // write fault
- if ( likely(gpte & _PAGE_RW) )
+ /* Write fault? */
+ if ( error_code & 2 )
+ {
+ if ( unlikely(!(gpte & _PAGE_RW)) )
{
- l1pte_write_fault( m, &gpte, &spte );
- }
- else
- { // write fault on RO page
- SH_VVLOG("shadow_fault - EXIT: write fault on RO page (%lx)",gpte );
+ /* Write fault on a read-only mapping. */
+ SH_VVLOG("shadow_fault - EXIT: wr fault on RO page (%lx)", gpte);
shadow_unlock(m);
- return 0; // propagate to guest
- // not clear whether we should set accessed bit here...
+ return 0;
}
+
+ l1pte_write_fault(m, &gpte, &spte);
}
else
{
- l1pte_read_fault( m, &gpte, &spte );
+ l1pte_read_fault(m, &gpte, &spte);
}
- SH_VVLOG("plan: gpte=%08lx spte=%08lx", gpte, spte );
-
- // write back updated gpte
- // XXX watch out for read-only L2 entries! (not used in Linux)
- if ( unlikely( __put_user( gpte, (unsigned long*)&linear_pg_table[va>>PAGE_SHIFT])) )
- domain_crash(); // fixme!
-
- if ( unlikely( __put_user( spte, (unsigned long*)&shadow_linear_pg_table[va>>PAGE_SHIFT])) )
- {
- // failed:
- // the L1 may not be shadowed, or the L2 entry may be insufficient
-
- unsigned long gpde, spde, gl1pfn, sl1pfn, sl1ss;
-
- SH_VVLOG("3: not shadowed or l2 insufficient gpte=%08lx spte=%08lx",gpte,spte );
-
- gpde = l2_pgentry_val(linear_l2_table[va>>L2_PAGETABLE_SHIFT]);
-
- gl1pfn = gpde>>PAGE_SHIFT;
-
- sl1ss = __shadow_status(¤t->mm, gl1pfn);
- if ( ! (sl1ss & PSH_shadowed) )
- {
- // this L1 is NOT already shadowed so we need to shadow it
- struct pfn_info *sl1pfn_info;
- unsigned long *gpl1e, *spl1e;
- int i;
- sl1pfn_info = alloc_shadow_page( ¤t->mm );
- sl1pfn_info->u.inuse.type_info = PGT_l1_page_table;
-
- sl1pfn = sl1pfn_info - frame_table;
-
- SH_VVLOG("4a: l1 not shadowed ( %08lx )",sl1pfn);
- perfc_incrc(shadow_l1_table_count);
- perfc_incr(shadow_l1_pages);
+ /*
+ * STEP 3. Write the modified shadow PTE and guest PTE back to the tables.
+ */
- set_shadow_status(¤t->mm, gl1pfn, PSH_shadowed | sl1pfn);
+ /* XXX Watch out for read-only L2 entries! (not used in Linux). */
+ if ( unlikely(__put_user(gpte, (unsigned long *)
+ &linear_pg_table[va >> PAGE_SHIFT])) )
+ domain_crash();
- l2pde_general( m, &gpde, &spde, sl1pfn );
-
- linear_l2_table[va>>L2_PAGETABLE_SHIFT] = mk_l2_pgentry(gpde);
- shadow_linear_l2_table[va>>L2_PAGETABLE_SHIFT] = mk_l2_pgentry(spde);
-
- gpl1e = (unsigned long *) &(linear_pg_table[
- (va>>PAGE_SHIFT) & ~(ENTRIES_PER_L1_PAGETABLE-1) ]);
-
- spl1e = (unsigned long *) &shadow_linear_pg_table[
- (va>>PAGE_SHIFT) & ~(ENTRIES_PER_L1_PAGETABLE-1) ];
-
-
- for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ )
- {
- l1pte_no_fault( m, &gpl1e[i], &spl1e[i] );
- }
-
-
- }
- else
- {
- // this L1 was shadowed (by another PT) but we didn't have an L2
- // entry for it
-
- SH_VVLOG("4b: was shadowed, l2 missing ( %08lx )",sl1pfn);
-
- sl1pfn = sl1ss & PSH_pfn_mask;
- l2pde_general( m, &gpde, &spde, sl1pfn );
-
- linear_l2_table[va>>L2_PAGETABLE_SHIFT] = mk_l2_pgentry(gpde);
- shadow_linear_l2_table[va>>L2_PAGETABLE_SHIFT] = mk_l2_pgentry(spde);
-
- }
-
- shadow_linear_pg_table[va>>PAGE_SHIFT] = mk_l1_pgentry(spte);
- // (we need to do the above even if we've just made the shadow L1)
+ /*
+ * Update of shadow PTE can fail because the L1 p.t. is not shadowed,
+ * or because the shadow isn't linked into this shadow L2 p.t.
+ */
+ if ( unlikely(__put_user(spte, (unsigned long *)
+ &shadow_linear_pg_table[va >> PAGE_SHIFT])) )
+ {
+ SH_VVLOG("3: not shadowed/mapped gpte=%08lx spte=%08lx", gpte, spte);
+ shadow_map_l1_into_current_l2(va);
+ shadow_linear_pg_table[va >> PAGE_SHIFT] = mk_l1_pgentry(spte);
+ }
- } // end of fixup writing the shadow L1 directly failed
-
perfc_incrc(shadow_fixup_count);
-
- m->shadow_fault_count++;
-
- check_pagetable( current, current->mm.pagetable, "post-sf" );
+ m->shadow_fault_count++;
shadow_unlock(m);
- return 1; // let's try the faulting instruction again...
-
+ check_pagetable(m, current->mm.pagetable, "post-sf");
+ return 1;
}
-void shadow_l1_normal_pt_update( unsigned long pa, unsigned long gpte,
- unsigned long *prev_spfn_ptr,
- l1_pgentry_t **prev_spl1e_ptr )
+void shadow_l1_normal_pt_update(
+ unsigned long pa, unsigned long gpte,
+ unsigned long *prev_spfn_ptr,
+ l1_pgentry_t **prev_spl1e_ptr)
{
- unsigned long gpfn, spfn, spte, prev_spfn = *prev_spfn_ptr;
- l1_pgentry_t * spl1e, * prev_spl1e = *prev_spl1e_ptr;
-
+ unsigned long spfn, spte, prev_spfn = *prev_spfn_ptr;
+ l1_pgentry_t *spl1e, *prev_spl1e = *prev_spl1e_ptr;
- SH_VVLOG("shadow_l1_normal_pt_update pa=%08lx, gpte=%08lx, prev_spfn=%08lx, prev_spl1e=%p\n",
- pa,gpte,prev_spfn, prev_spl1e);
+ /* N.B. To get here, we know the l1 page *must* be shadowed. */
+ SH_VVLOG("shadow_l1_normal_pt_update pa=%08lx, gpte=%08lx, "
+ "prev_spfn=%08lx, prev_spl1e=%p\n",
+ pa, gpte, prev_spfn, prev_spl1e);
- // to get here, we know the l1 page *must* be shadowed
-
- gpfn = pa >> PAGE_SHIFT;
- spfn = __shadow_status(¤t->mm, gpfn) & PSH_pfn_mask;
+ spfn = __shadow_status(¤t->mm, pa >> PAGE_SHIFT) & PSH_pfn_mask;
if ( spfn == prev_spfn )
{
}
else
{
- if( prev_spl1e ) unmap_domain_mem( prev_spl1e );
- spl1e = (l1_pgentry_t *) map_domain_mem( spfn << PAGE_SHIFT );
+ if ( prev_spl1e != NULL )
+ unmap_domain_mem( prev_spl1e );
+ spl1e = (l1_pgentry_t *)map_domain_mem(spfn << PAGE_SHIFT);
*prev_spfn_ptr = spfn;
*prev_spl1e_ptr = spl1e;
}
- // XXX we assume only pagetables can be shadowed;
- // this will have to change to allow arbitrary CoW etc.
-
- l1pte_no_fault( ¤t->mm, &gpte, &spte );
-
-
- spl1e[(pa & ~PAGE_MASK) / sizeof(l1_pgentry_t) ] = mk_l1_pgentry( spte );
-
+ l1pte_propagate_from_guest(¤t->mm, &gpte, &spte);
+ spl1e[(pa & ~PAGE_MASK) / sizeof(l1_pgentry_t)] = mk_l1_pgentry(spte);
}
-void shadow_l2_normal_pt_update( unsigned long pa, unsigned long gpte )
+void shadow_l2_normal_pt_update(unsigned long pa, unsigned long gpte)
{
- unsigned long gpfn, spfn, spte;
- l2_pgentry_t * sp2le;
- unsigned long s_sh=0;
+ unsigned long spfn, spte;
+ l2_pgentry_t *spl2e;
+ unsigned long s_sh;
+ /* N.B. To get here, we know the l2 page *must* be shadowed. */
SH_VVLOG("shadow_l2_normal_pt_update pa=%08lx, gpte=%08lx",pa,gpte);
- // to get here, we know the l2 page has a shadow
-
- gpfn = pa >> PAGE_SHIFT;
- spfn = __shadow_status(¤t->mm, gpfn) & PSH_pfn_mask;
-
+ spfn = __shadow_status(¤t->mm, pa >> PAGE_SHIFT) & PSH_pfn_mask;
- spte = 0;
+ s_sh = (gpte & _PAGE_PRESENT) ?
+ __shadow_status(¤t->mm, gpte >> PAGE_SHIFT) : 0;
- if( gpte & _PAGE_PRESENT )
- s_sh = __shadow_status(¤t->mm, gpte >> PAGE_SHIFT);
-
- sp2le = (l2_pgentry_t *) map_domain_mem( spfn << PAGE_SHIFT );
- // no real need for a cache here
+ /* XXXX Should mark guest pte as DIRTY and ACCESSED too! */
+ l2pde_general(¤t->mm, &gpte, &spte, s_sh);
+ spl2e = (l2_pgentry_t *)map_domain_mem(spfn << PAGE_SHIFT);
+ spl2e[(pa & ~PAGE_MASK) / sizeof(l2_pgentry_t)] = mk_l2_pgentry(spte);
+ unmap_domain_mem(spl2e);
+}
- l2pde_general( ¤t->mm, &gpte, &spte, s_sh );
- // XXXX Should mark guest pte as DIRTY and ACCESSED too!!!!!
- sp2le[(pa & ~PAGE_MASK) / sizeof(l2_pgentry_t) ] =
- mk_l2_pgentry( spte );
-
- unmap_domain_mem( (void *) sp2le );
-}
+/************************************************************************/
+/************************************************************************/
+/************************************************************************/
#if SHADOW_DEBUG
static int sh_l1_present;
char * sh_check_name;
-#define FAIL(_f, _a...) \
-{printk("XXX %s-FAIL (%d,%d)" _f " g=%08lx s=%08lx\n", sh_check_name, level, i, ## _a , gpte, spte ); BUG();}
+#define FAIL(_f, _a...) \
+ do { \
+ printk("XXX %s-FAIL (%d,%d)" _f " g=%08lx s=%08lx\n", \
+ sh_check_name, level, i, ## _a , gpte, spte); \
+ BUG(); \
+ } while ( 0 )
-static int check_pte( struct mm_struct *m,
- unsigned long gpte, unsigned long spte, int level, int i )
+static int check_pte(
+ struct mm_struct *m, unsigned long gpte, unsigned long spte,
+ int level, int i)
{
unsigned long mask, gpfn, spfn;
- if ( spte == 0 || spte == 0xdeadface || spte == 0x00000E00)
- return 1; // always safe
+ if ( (spte == 0) || (spte == 0xdeadface) || (spte == 0x00000E00) )
+ return 1; /* always safe */
if ( !(spte & _PAGE_PRESENT) )
FAIL("Non zero not present spte");
- if( level == 2 ) sh_l2_present++;
- if( level == 1 ) sh_l1_present++;
+ if ( level == 2 ) sh_l2_present++;
+ if ( level == 1 ) sh_l1_present++;
if ( !(gpte & _PAGE_PRESENT) )
FAIL("Guest not present yet shadow is");
mask = ~(_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW|0xFFFFF000);
- if ( (spte & mask) != (gpte & mask ) )
+ if ( (spte & mask) != (gpte & mask) )
FAIL("Corrupt?");
if ( (spte & _PAGE_DIRTY ) && !(gpte & _PAGE_DIRTY) )
if ( (spte & _PAGE_RW ) && !(gpte & _PAGE_RW) )
FAIL("RW coherence");
- if ( (spte & _PAGE_RW ) && !((gpte & _PAGE_RW) && (gpte & _PAGE_DIRTY) ))
+ if ( (spte & _PAGE_RW ) && !((gpte & _PAGE_RW) && (gpte & _PAGE_DIRTY)) )
FAIL("RW2 coherence");
- spfn = spte>>PAGE_SHIFT;
- gpfn = gpte>>PAGE_SHIFT;
+ spfn = spte >> PAGE_SHIFT;
+ gpfn = gpte >> PAGE_SHIFT;
if ( gpfn == spfn )
{
if ( level > 1 )
- FAIL("Linear map ???"); // XXX this will fail on BSD
-
- return 1;
+ FAIL("Linear map ???"); /* XXX this will fail on BSD */
}
else
{
if ( level < 2 )
FAIL("Shadow in L1 entry?");
- if ( __shadow_status(p, gpfn) != (PSH_shadowed | spfn) )
- FAIL("spfn problem g.sf=%08lx",
- __shadow_status(p, gpfn) );
+ if ( __shadow_status(m, gpfn) != (PSH_shadowed | spfn) )
+ FAIL("spfn problem g.sf=%08lx", __shadow_status(m, gpfn));
}
return 1;
}
-static int check_l1_table( struct mm_struct *m, unsigned long va,
- unsigned long g2, unsigned long s2 )
+static int check_l1_table(
+ struct mm_struct *m, unsigned long va,
+ unsigned long g2, unsigned long s2)
{
- int j;
+ int i;
unsigned long *gpl1e, *spl1e;
- //gpl1e = (unsigned long *) &(linear_pg_table[ va>>PAGE_SHIFT]);
- //spl1e = (unsigned long *) &(shadow_linear_pg_table[ va>>PAGE_SHIFT]);
+ gpl1e = map_domain_mem(g2 << PAGE_SHIFT);
+ spl1e = map_domain_mem(s2 << PAGE_SHIFT);
- gpl1e = map_domain_mem( g2<<PAGE_SHIFT );
- spl1e = map_domain_mem( s2<<PAGE_SHIFT );
-
- for ( j = 0; j < ENTRIES_PER_L1_PAGETABLE; j++ )
- {
- unsigned long gpte = gpl1e[j];
- unsigned long spte = spl1e[j];
-
- check_pte( p, gpte, spte, 1, j );
- }
+ for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ )
+ check_pte(m, gpl1e[i], spl1e[i], 1, i);
- unmap_domain_mem( spl1e );
- unmap_domain_mem( gpl1e );
+ unmap_domain_mem(spl1e);
+ unmap_domain_mem(gpl1e);
return 1;
}
-#define FAILPT(_f, _a...) \
-{printk("XXX FAIL %s-PT" _f "\n", s, ## _a ); BUG();}
+#define FAILPT(_f, _a...) \
+ do { \
+ printk("XXX FAIL %s-PT" _f "\n", s, ## _a ); \
+ BUG(); \
+ } while ( 0 )
-int check_pagetable( struct mm_struct *m, pagetable_t pt, char *s )
+int check_pagetable(struct mm_struct *m, pagetable_t pt, char *s)
{
unsigned long gptbase = pagetable_val(pt);
unsigned long gpfn, spfn;
- int i;
+ int i;
l2_pgentry_t *gpl2e, *spl2e;
sh_check_name = s;
- SH_VVLOG("%s-PT Audit",s);
+ SH_VVLOG("%s-PT Audit", s);
sh_l2_present = sh_l1_present = 0;
- gpfn = gptbase >> PAGE_SHIFT;
+ gpfn = gptbase >> PAGE_SHIFT;
- if ( ! (__shadow_status(p, gpfn) & PSH_shadowed) )
+ if ( !(__shadow_status(m, gpfn) & PSH_shadowed) )
{
printk("%s-PT %08lx not shadowed\n", s, gptbase);
-
- if( __shadow_status(p, gpfn) != 0 ) BUG();
-
+ if ( __shadow_status(m, gpfn) != 0 )
+ BUG();
return 0;
}
- spfn = __shadow_status(p, gpfn) & PSH_pfn_mask;
+ spfn = __shadow_status(m, gpfn) & PSH_pfn_mask;
- if ( ! __shadow_status(p, gpfn) == (PSH_shadowed | spfn) )
+ if ( __shadow_status(m, gpfn) != (PSH_shadowed | spfn) )
FAILPT("ptbase shadow inconsistent1");
gpl2e = (l2_pgentry_t *) map_domain_mem( gpfn << PAGE_SHIFT );
spl2e = (l2_pgentry_t *) map_domain_mem( spfn << PAGE_SHIFT );
- //ipl2e = (l2_pgentry_t *) map_domain_mem( spfn << PAGE_SHIFT );
-
-
- if ( memcmp( &spl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
- &gpl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
- ((SH_LINEAR_PT_VIRT_START>>(L2_PAGETABLE_SHIFT))-DOMAIN_ENTRIES_PER_L2_PAGETABLE)
- * sizeof(l2_pgentry_t)) )
+ if ( memcmp(&spl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
+ &gpl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
+ ((SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT) -
+ DOMAIN_ENTRIES_PER_L2_PAGETABLE) * sizeof(l2_pgentry_t)) )
{
printk("gpfn=%08lx spfn=%08lx\n", gpfn, spfn);
- for (i=DOMAIN_ENTRIES_PER_L2_PAGETABLE;
- i<(SH_LINEAR_PT_VIRT_START>>(L2_PAGETABLE_SHIFT));
- i++ )
+ for ( i = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
+ i < (SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT);
+ i++ )
printk("+++ (%d) %08lx %08lx\n",i,
- l2_pgentry_val(gpl2e[i]), l2_pgentry_val(spl2e[i]) );
+ l2_pgentry_val(gpl2e[i]), l2_pgentry_val(spl2e[i]));
FAILPT("hypervisor entries inconsistent");
}
l2_pgentry_val(gpl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT])) )
FAILPT("hypervisor linear map inconsistent");
- if ( (l2_pgentry_val(spl2e[SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT]) !=
+ if ( (l2_pgentry_val(spl2e[SH_LINEAR_PT_VIRT_START >>
+ L2_PAGETABLE_SHIFT]) !=
((spfn << PAGE_SHIFT) | __PAGE_HYPERVISOR)) )
FAILPT("hypervisor shadow linear map inconsistent %08lx %08lx",
- l2_pgentry_val(spl2e[SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT]),
- (spfn << PAGE_SHIFT) | __PAGE_HYPERVISOR
- );
+ l2_pgentry_val(spl2e[SH_LINEAR_PT_VIRT_START >>
+ L2_PAGETABLE_SHIFT]),
+ (spfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
if ( (l2_pgentry_val(spl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT]) !=
- ((__pa(frame_table[gpfn].u.inuse.domain->mm.perdomain_pt) | __PAGE_HYPERVISOR))) )
+ ((__pa(frame_table[gpfn].u.inuse.domain->mm.perdomain_pt) |
+ __PAGE_HYPERVISOR))) )
FAILPT("hypervisor per-domain map inconsistent");
- // check the whole L2
+ /* Check the whole L2. */
for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
- {
- unsigned long gpte = l2_pgentry_val(gpl2e[i]);
- unsigned long spte = l2_pgentry_val(spl2e[i]);
+ check_pte(m, l2_pgentry_val(gpl2e[i]), l2_pgentry_val(spl2e[i]), 2, i);
- check_pte( p, gpte, spte, 2, i );
- }
-
-
- // go back and recurse
+ /* Go back and recurse. */
for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
{
- unsigned long gpte = l2_pgentry_val(gpl2e[i]);
- unsigned long spte = l2_pgentry_val(spl2e[i]);
-
- if ( spte )
- check_l1_table( p,
- i<<L2_PAGETABLE_SHIFT,
- gpte>>PAGE_SHIFT, spte>>PAGE_SHIFT );
-
+ if ( l2_pgentry_val(spl2e[i]) != 0 )
+ check_l1_table(
+ m, i << L2_PAGETABLE_SHIFT,
+ l2_pgentry_val(gpl2e[i]) >> PAGE_SHIFT,
+ l2_pgentry_val(spl2e[i]) >> PAGE_SHIFT);
}
- unmap_domain_mem( spl2e );
- unmap_domain_mem( gpl2e );
+ unmap_domain_mem(spl2e);
+ unmap_domain_mem(gpl2e);
SH_VVLOG("PT verified : l2_present = %d, l1_present = %d\n",
- sh_l2_present, sh_l1_present );
+ sh_l2_present, sh_l1_present);
return 1;
}
-
#endif
#include <xen/perfc.h>
#include <asm/processor.h>
-
/* Shadow PT flag bits in pfn_info */
#define PSH_shadowed (1<<31) /* page has a shadow. PFN points to shadow */
-#define PSH_pending (1<<29) /* page is in the process of being shadowed */
#define PSH_pfn_mask ((1<<21)-1)
/* Shadow PT operation mode : shadowmode variable in mm_struct */
#define SHM_test (1) /* just run domain on shadow PTs */
#define SHM_logdirty (2) /* log pages that are dirtied */
#define SHM_translate (3) /* lookup machine pages in translation table */
-//#define SHM_cow (4) /* copy on write all dirtied pages */
-
+#define SHM_cow (4) /* copy on write all dirtied pages */
#define shadow_linear_pg_table ((l1_pgentry_t *)SH_LINEAR_PT_VIRT_START)
-#define shadow_linear_l2_table ((l2_pgentry_t *)(SH_LINEAR_PT_VIRT_START+(SH_LINEAR_PT_VIRT_START>>(L2_PAGETABLE_SHIFT-L1_PAGETABLE_SHIFT))))
+#define shadow_linear_l2_table ((l2_pgentry_t *)(SH_LINEAR_PT_VIRT_START + \
+ (SH_LINEAR_PT_VIRT_START >> (L2_PAGETABLE_SHIFT - L1_PAGETABLE_SHIFT))))
#define shadow_mode(_d) ((_d)->mm.shadow_mode)
#define shadow_lock_init(_d) spin_lock_init(&(_d)->mm.shadow_lock)
extern void shadow_mode_init(void);
extern int shadow_mode_control(struct domain *p, dom0_shadow_control_t *sc);
extern int shadow_fault(unsigned long va, long error_code);
-extern void shadow_l1_normal_pt_update(unsigned long pa, unsigned long gpte,
- unsigned long *prev_spfn_ptr,
- l1_pgentry_t **prev_spl1e_ptr);
+extern void shadow_l1_normal_pt_update(
+ unsigned long pa, unsigned long gpte,
+ unsigned long *prev_spfn_ptr, l1_pgentry_t **prev_spl1e_ptr);
extern void shadow_l2_normal_pt_update(unsigned long pa, unsigned long gpte);
extern void unshadow_table(unsigned long gpfn, unsigned int type);
extern int shadow_mode_enable(struct domain *p, unsigned int mode);
}
extern unsigned long shadow_l2_table(
- struct mm_struct *m, unsigned long gpfn );
+ struct mm_struct *m, unsigned long gpfn);
-#define SHADOW_DEBUG 0
+#define SHADOW_DEBUG 0
#define SHADOW_HASH_DEBUG 0
-#define SHADOW_OPTIMISE 1
struct shadow_status {
- unsigned long pfn; // gpfn
- unsigned long spfn_and_flags; // spfn plus flags
- struct shadow_status *next; // use pull-to-front list.
+ unsigned long pfn; /* Guest pfn. */
+ unsigned long spfn_and_flags; /* Shadow pfn plus flags. */
+ struct shadow_status *next; /* Pull-to-front list. */
};
-#define shadow_ht_extra_size 128 /*128*/
-#define shadow_ht_buckets 256 /*256*/
+#define shadow_ht_extra_size 128
+#define shadow_ht_buckets 256
#ifdef VERBOSE
#define SH_LOG(_f, _a...) \
/************************************************************************/
-static inline int __mark_dirty( struct mm_struct *m, unsigned int mfn )
+static inline int __mark_dirty( struct mm_struct *m, unsigned int mfn)
{
- unsigned int pfn;
- int rc = 0;
+ unsigned long pfn;
+ int rc = 0;
ASSERT(spin_is_locked(&m->shadow_lock));
+ ASSERT(m->shadow_dirty_bitmap != NULL);
pfn = machine_to_phys_mapping[mfn];
- /* We use values with the top bit set to mark MFNs that aren't
- really part of the domain's psuedo-physical memory map e.g.
- the shared info frame. Nothing to do here...
- */
- if ( unlikely(pfn & 0x80000000U) ) return rc;
+ /*
+ * Values with the MSB set denote MFNs that aren't really part of the
+ * domain's pseudo-physical memory map (e.g., the shared info frame).
+ * Nothing to do here...
+ */
+ if ( unlikely(pfn & 0x80000000UL) )
+ return rc;
- ASSERT(m->shadow_dirty_bitmap);
- if( likely(pfn<m->shadow_dirty_bitmap_size) )
+ if ( likely(pfn < m->shadow_dirty_bitmap_size) )
{
- /* These updates occur with mm.shadow_lock held, so use
- (__) version of test_and_set */
- if ( __test_and_set_bit( pfn, m->shadow_dirty_bitmap ) == 0 )
+ /* N.B. Can use non-atomic TAS because protected by shadow_lock. */
+ if ( !__test_and_set_bit(pfn, m->shadow_dirty_bitmap) )
{
- // if we set it
m->shadow_dirty_count++;
rc = 1;
}
}
- else
+#ifndef NDEBUG
+ else if ( mfn < max_page )
{
- if ( mfn < max_page )
- {
- SH_LOG("mark_dirty OOR! mfn=%x pfn=%x max=%x (mm %p)",
- mfn, pfn, m->shadow_dirty_bitmap_size, m );
- SH_LOG("dom=%p caf=%08x taf=%08x\n",
- frame_table[mfn].u.inuse.domain,
- frame_table[mfn].count_info,
- frame_table[mfn].u.inuse.type_info );
- {
- extern void show_trace(unsigned long *esp);
- unsigned long *esp;
- __asm__ __volatile__ ("movl %%esp,%0" : "=r" (esp) : );
- show_trace(esp);
- }
- }
+ SH_LOG("mark_dirty OOR! mfn=%x pfn=%x max=%x (mm %p)",
+ mfn, pfn, m->shadow_dirty_bitmap_size, m );
+ SH_LOG("dom=%p caf=%08x taf=%08x\n",
+ frame_table[mfn].u.inuse.domain,
+ frame_table[mfn].count_info,
+ frame_table[mfn].u.inuse.type_info );
+ {
+ extern void show_trace(unsigned long *esp);
+ unsigned long *esp;
+ __asm__ __volatile__ ("movl %%esp,%0" : "=r" (esp) : );
+ show_trace(esp);
+ }
}
+#endif
return rc;
}
-static inline int mark_dirty( struct mm_struct *m, unsigned int mfn )
+static inline int mark_dirty(struct mm_struct *m, unsigned int mfn)
{
int rc;
- //ASSERT(local_irq_is_enabled());
- //if(spin_is_locked(&m->shadow_lock)) printk("+");
shadow_lock(m);
- rc = __mark_dirty( m, mfn );
+ rc = __mark_dirty(m, mfn);
shadow_unlock(m);
return rc;
}
unsigned long gpte = *gpte_p;
unsigned long spte = *spte_p;
+ ASSERT(gpte & _PAGE_RW);
+
+ gpte |= _PAGE_DIRTY | _PAGE_ACCESSED;
+
switch ( m->shadow_mode )
{
case SHM_test:
- spte = gpte;
- gpte |= _PAGE_DIRTY | _PAGE_ACCESSED;
- spte |= _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED;
+ spte = gpte | _PAGE_RW;
break;
case SHM_logdirty:
- spte = gpte;
- gpte |= _PAGE_DIRTY | _PAGE_ACCESSED;
- spte |= _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED;
- __mark_dirty( m, (gpte >> PAGE_SHIFT) );
+ spte = gpte | _PAGE_RW;
+ __mark_dirty(m, gpte >> PAGE_SHIFT);
break;
}
unsigned long gpte = *gpte_p;
unsigned long spte = *spte_p;
+ gpte |= _PAGE_ACCESSED;
+
switch ( m->shadow_mode )
{
case SHM_test:
- spte = gpte;
- gpte |= _PAGE_ACCESSED;
- spte |= _PAGE_ACCESSED;
- if ( ! (gpte & _PAGE_DIRTY ) )
- spte &= ~ _PAGE_RW;
+ spte = (gpte & _PAGE_DIRTY) ? gpte : (gpte & ~_PAGE_RW);
break;
case SHM_logdirty:
- spte = gpte;
- gpte |= _PAGE_ACCESSED;
- spte |= _PAGE_ACCESSED;
- spte &= ~ _PAGE_RW;
+ spte = gpte & ~_PAGE_RW;
break;
}
*spte_p = spte;
}
-static inline void l1pte_no_fault(
+static inline void l1pte_propagate_from_guest(
struct mm_struct *m, unsigned long *gpte_p, unsigned long *spte_p)
{
unsigned long gpte = *gpte_p;
spte = 0;
if ( (gpte & (_PAGE_PRESENT|_PAGE_ACCESSED) ) ==
(_PAGE_PRESENT|_PAGE_ACCESSED) )
- {
- spte = gpte;
- if ( ! (gpte & _PAGE_DIRTY ) )
- spte &= ~ _PAGE_RW;
- }
+ spte = (gpte & _PAGE_DIRTY) ? gpte : (gpte & ~_PAGE_RW);
break;
case SHM_logdirty:
spte = 0;
if ( (gpte & (_PAGE_PRESENT|_PAGE_ACCESSED) ) ==
(_PAGE_PRESENT|_PAGE_ACCESSED) )
- {
- spte = gpte;
- spte &= ~ _PAGE_RW;
- }
-
+ spte = gpte & ~_PAGE_RW;
break;
}
}
static inline void l2pde_general(
- struct mm_struct *m,
+ struct mm_struct *m,
unsigned long *gpde_p,
unsigned long *spde_p,
unsigned long sl1pfn)
spde = 0;
- if ( sl1pfn )
+ if ( sl1pfn != 0 )
{
- spde = (gpde & ~PAGE_MASK) | (sl1pfn<<PAGE_SHIFT) |
+ spde = (gpde & ~PAGE_MASK) | (sl1pfn << PAGE_SHIFT) |
_PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY;
- gpde = gpde | _PAGE_ACCESSED | _PAGE_DIRTY;
+ gpde |= _PAGE_ACCESSED | _PAGE_DIRTY;
- if ( unlikely( (sl1pfn<<PAGE_SHIFT) == (gpde & PAGE_MASK) ) )
- {
- // detect linear map, and keep pointing at guest
- SH_VLOG("4c: linear mapping ( %08lx )",sl1pfn);
+ /* Detect linear p.t. mappings and write-protect them. */
+ if ( (frame_table[sl1pfn].u.inuse.type_info & PGT_type_mask) ==
+ PGT_l2_page_table )
spde = gpde & ~_PAGE_RW;
- }
}
*gpde_p = gpde;
#if SHADOW_HASH_DEBUG
static void shadow_audit(struct mm_struct *m, int print)
{
- int live=0, free=0, j=0, abs;
+ int live = 0, free = 0, j = 0, abs;
struct shadow_status *a;
- for( j = 0; j < shadow_ht_buckets; j++ )
+ for ( j = 0; j < shadow_ht_buckets; j++ )
{
a = &m->shadow_ht[j];
- if(a->pfn){live++; ASSERT(a->spfn_and_flags&PSH_pfn_mask);}
- ASSERT((a->pfn&0xf0000000)==0);
- ASSERT(a->pfn<0x00100000);
- a=a->next;
- while(a && live<9999)
+ if ( a->pfn ) { live++; ASSERT(a->spfn_and_flags & PSH_pfn_mask); }
+ ASSERT(a->pfn < 0x00100000UL);
+ a = a->next;
+ while ( a && (live < 9999) )
{
live++;
- if(a->pfn == 0 || a->spfn_and_flags == 0)
+ if ( (a->pfn == 0) || (a->spfn_and_flags == 0) )
{
printk("XXX live=%d pfn=%08lx sp=%08lx next=%p\n",
live, a->pfn, a->spfn_and_flags, a->next);
BUG();
}
- ASSERT(a->pfn);
- ASSERT((a->pfn&0xf0000000)==0);
- ASSERT(a->pfn<0x00100000);
- ASSERT(a->spfn_and_flags&PSH_pfn_mask);
- a=a->next;
+ ASSERT(a->pfn < 0x00100000UL);
+ ASSERT(a->spfn_and_flags & PSH_pfn_mask);
+ a = a->next;
}
- ASSERT(live<9999);
+ ASSERT(live < 9999);
}
- a = m->shadow_ht_free;
- while(a) { free++; a=a->next; }
+ for ( a = m->shadow_ht_free; a != NULL; a = a->next )
+ free++;
- if(print) printk("Xlive=%d free=%d\n",live,free);
+ if ( print)
+ printk("Xlive=%d free=%d\n",live,free);
- abs=(perfc_value(shadow_l1_pages)+perfc_value(shadow_l2_pages))-live;
- if( abs < -1 || abs > 1 )
+ abs = (perfc_value(shadow_l1_pages) + perfc_value(shadow_l2_pages)) - live;
+ if ( (abs < -1) || (abs > 1) )
{
printk("live=%d free=%d l1=%d l2=%d\n",live,free,
perfc_value(shadow_l1_pages), perfc_value(shadow_l2_pages) );
BUG();
}
-
}
-
#else
-#define shadow_audit(p, print)
+#define shadow_audit(p, print) ((void)0)
#endif
-static inline struct shadow_status* hash_bucket( struct mm_struct *m,
- unsigned int gpfn )
+static inline struct shadow_status *hash_bucket(
+ struct mm_struct *m, unsigned int gpfn)
{
- return &(m->shadow_ht[gpfn % shadow_ht_buckets]);
+ return &m->shadow_ht[gpfn % shadow_ht_buckets];
}
-static inline unsigned long __shadow_status( struct mm_struct *m,
- unsigned int gpfn )
+static inline unsigned long __shadow_status(
+ struct mm_struct *m, unsigned int gpfn)
{
- struct shadow_status **ob, *b, *B = hash_bucket( m, gpfn );
+ struct shadow_status *p, *x, *head;
- b = B;
- ob = NULL;
+ x = head = hash_bucket(m, gpfn);
+ p = NULL;
- SH_VVLOG("lookup gpfn=%08x bucket=%p", gpfn, b );
- shadow_audit(m,0); // if in debug mode
+ SH_VVLOG("lookup gpfn=%08x bucket=%p", gpfn, x);
+ shadow_audit(m, 0);
do
{
- if ( b->pfn == gpfn )
+ ASSERT(x->pfn || ((x == head) && (x->next == NULL)));
+
+ if ( x->pfn == gpfn )
{
- unsigned long t;
- struct shadow_status *x;
-
- // swap with head
- t=B->pfn; B->pfn=b->pfn; b->pfn=t;
- t=B->spfn_and_flags; B->spfn_and_flags=b->spfn_and_flags;
- b->spfn_and_flags=t;
-
- if( ob )
- { // pull to front
- *ob=b->next;
- x=B->next;
- B->next=b;
- b->next=x;
+ /* Pull-to-front if 'x' isn't already the head item. */
+ if ( unlikely(x != head) )
+ {
+ /* Delete 'x' from list and reinsert immediately after head. */
+ p->next = x->next;
+ x->next = head->next;
+ head->next = x;
+
+ /* Swap 'x' contents with head contents. */
+ SWAP(head->pfn, x->pfn);
+ SWAP(head->spfn_and_flags, x->spfn_and_flags);
}
- return B->spfn_and_flags;
- }
-#if SHADOW_HASH_DEBUG
- else
- {
- if(b!=B)ASSERT(b->pfn);
+
+ return head->spfn_and_flags;
}
-#endif
- ob=&b->next;
- b=b->next;
+
+ p = x;
+ x = x->next;
}
- while (b);
+ while ( x != NULL );
return 0;
}
-/* we can make this locking more fine grained e.g. per shadow page if it
-ever becomes a problem, but since we need a spin lock on the hash table
-anyway its probably not worth being too clever. */
-
-static inline unsigned long get_shadow_status( struct mm_struct *m,
- unsigned int gpfn )
+/*
+ * N.B. We can make this locking more fine grained (e.g., per shadow page) if
+ * it ever becomes a problem, but since we need a spin lock on the hash table
+ * anyway it's probably not worth being too clever.
+ */
+static inline unsigned long get_shadow_status(
+ struct mm_struct *m, unsigned int gpfn )
{
unsigned long res;
- /* If we get here, we know that this domain is running in shadow mode.
- We also know that some sort of update has happened to the underlying
- page table page: either a PTE has been updated, or the page has
- changed type. If we're in log dirty mode, we should set the approrpiate
- bit in the dirty bitmap.
- NB: the VA update path doesn't use this so needs to be handled
- independnetly.
- */
-
- //ASSERT(local_irq_is_enabled());
- //if(spin_is_locked(&m->shadow_lock)) printk("*");
+ ASSERT(m->shadow_mode);
+
+ /*
+ * If we get here we know that some sort of update has happened to the
+ * underlying page table page: either a PTE has been updated, or the page
+ * has changed type. If we're in log dirty mode, we should set the
+ * appropriate bit in the dirty bitmap.
+ * N.B. The VA update path doesn't use this and is handled independently.
+ */
+
shadow_lock(m);
- if( m->shadow_mode == SHM_logdirty )
+ if ( m->shadow_mode == SHM_logdirty )
__mark_dirty( m, gpfn );
- res = __shadow_status( m, gpfn );
- if (!res)
+ if ( !(res = __shadow_status(m, gpfn)) )
shadow_unlock(m);
+
return res;
}
-static inline void put_shadow_status( struct mm_struct *m )
+static inline void put_shadow_status(
+ struct mm_struct *m)
{
shadow_unlock(m);
}
-static inline void delete_shadow_status( struct mm_struct *m,
- unsigned int gpfn )
+static inline void delete_shadow_status(
+ struct mm_struct *m, unsigned int gpfn)
{
- struct shadow_status *b, *B, **ob;
+ struct shadow_status *p, *x, *n, *head;
ASSERT(spin_is_locked(&m->shadow_lock));
+ ASSERT(gpfn != 0);
- B = b = hash_bucket( m, gpfn );
+ head = hash_bucket(m, gpfn);
- SH_VVLOG("delete gpfn=%08x bucket=%p", gpfn, b );
- shadow_audit(m,0);
- ASSERT(gpfn);
+ SH_VVLOG("delete gpfn=%08x bucket=%p", gpfn, b);
+ shadow_audit(m, 0);
- if( b->pfn == gpfn )
+ /* Match on head item? */
+ if ( head->pfn == gpfn )
{
- if (b->next)
+ if ( (n = head->next) != NULL )
{
- struct shadow_status *D=b->next;
- b->spfn_and_flags = b->next->spfn_and_flags;
- b->pfn = b->next->pfn;
-
- b->next = b->next->next;
- D->next = m->shadow_ht_free;
- D->pfn = 0;
- D->spfn_and_flags = 0;
- m->shadow_ht_free = D;
+ /* Overwrite head with contents of following node. */
+ head->pfn = n->pfn;
+ head->spfn_and_flags = n->spfn_and_flags;
+
+ /* Delete following node. */
+ head->next = n->next;
+
+ /* Add deleted node to the free list. */
+ n->pfn = 0;
+ n->spfn_and_flags = 0;
+ n->next = m->shadow_ht_free;
+ m->shadow_ht_free = n;
}
else
{
- b->pfn = 0;
- b->spfn_and_flags = 0;
+ /* This bucket is now empty. Initialise the head node. */
+ head->pfn = 0;
+ head->spfn_and_flags = 0;
}
-#if SHADOW_HASH_DEBUG
- if( __shadow_status(m,gpfn) ) BUG();
- shadow_audit(m,0);
-#endif
- return;
+ goto found;
}
- ob = &b->next;
- b=b->next;
+ p = head;
+ x = head->next;
do
{
- if ( b->pfn == gpfn )
+ if ( x->pfn == gpfn )
{
- b->pfn = 0;
- b->spfn_and_flags = 0;
+ /* Delete matching node. */
+ p->next = x->next;
- // b is in the list
- *ob=b->next;
- b->next = m->shadow_ht_free;
- m->shadow_ht_free = b;
+ /* Add deleted node to the free list. */
+ x->pfn = 0;
+ x->spfn_and_flags = 0;
+ x->next = m->shadow_ht_free;
+ m->shadow_ht_free = x;
-#if SHADOW_HASH_DEBUG
- if( __shadow_status(m,gpfn) ) BUG();
-#endif
- shadow_audit(m,0);
- return;
+ goto found;
}
- ob = &b->next;
- b=b->next;
+ p = x;
+ x = x->next;
}
- while (b);
+ while ( x != NULL );
- // if we got here, it wasn't in the list
+ /* If we got here, it wasn't in the list! */
BUG();
+
+ found:
+ shadow_audit(m, 0);
}
-static inline void set_shadow_status( struct mm_struct *m,
- unsigned int gpfn, unsigned long s )
+static inline void set_shadow_status(
+ struct mm_struct *m, unsigned int gpfn, unsigned long s)
{
- struct shadow_status *b, *B, *extra, **fptr;
+ struct shadow_status *x, *head, *extra;
int i;
ASSERT(spin_is_locked(&m->shadow_lock));
+ ASSERT(gpfn != 0);
+ ASSERT(s & PSH_shadowed);
- B = b = hash_bucket( m, gpfn );
+ x = head = hash_bucket(m, gpfn);
- ASSERT(gpfn);
- SH_VVLOG("set gpfn=%08x s=%08lx bucket=%p(%p)", gpfn, s, b, b->next );
+ SH_VVLOG("set gpfn=%08x s=%08lx bucket=%p(%p)", gpfn, s, b, b->next);
+ shadow_audit(m, 0);
- shadow_audit(m,0);
+ /*
+ * STEP 1. If page is already in the table, update it in place.
+ */
do
{
- if ( b->pfn == gpfn )
+ if ( x->pfn == gpfn )
{
- b->spfn_and_flags = s;
- shadow_audit(m,0);
- return;
+ x->spfn_and_flags = s;
+ goto done;
}
- b=b->next;
+ x = x->next;
}
- while (b);
-
- // if we got here, this is an insert rather than update
+ while ( x != NULL );
- ASSERT( s ); // deletes must have succeeded by here
+ /*
+ * STEP 2. The page must be inserted into the table.
+ */
- if ( B->pfn == 0 )
+ /* If the bucket is empty then insert the new page as the head item. */
+ if ( head->pfn == 0 )
{
- // we can use this head
- ASSERT( B->next == 0 );
- B->pfn = gpfn;
- B->spfn_and_flags = s;
- shadow_audit(m,0);
- return;
+ head->pfn = gpfn;
+ head->spfn_and_flags = s;
+ ASSERT(head->next == NULL);
+ goto done;
}
- if( unlikely(m->shadow_ht_free == NULL) )
+ /* We need to allocate a new node. Ensure the quicklist is non-empty. */
+ if ( unlikely(m->shadow_ht_free == NULL) )
{
- SH_LOG("allocate more shadow hashtable blocks");
+ SH_LOG("Allocate more shadow hashtable blocks.");
- // we need to allocate more space
- extra = xmalloc(sizeof(void*) + (shadow_ht_extra_size *
- sizeof(struct shadow_status)));
+ extra = xmalloc(
+ sizeof(void *) + (shadow_ht_extra_size * sizeof(*x)));
- if( ! extra ) BUG(); // should be more graceful here....
+ /* XXX Should be more graceful here. */
+ if ( extra == NULL )
+ BUG();
- memset(extra, 0, sizeof(void*) + (shadow_ht_extra_size *
- sizeof(struct shadow_status)));
+ memset(extra, 0, sizeof(void *) + (shadow_ht_extra_size * sizeof(*x)));
+ /* Record the allocation block so it can be correctly freed later. */
m->shadow_extras_count++;
-
- // add extras to free list
- fptr = &m->shadow_ht_free;
- for ( i=0; i<shadow_ht_extra_size; i++ )
- {
- *fptr = &extra[i];
- fptr = &(extra[i].next);
- }
- *fptr = NULL;
-
- *((struct shadow_status ** ) &extra[shadow_ht_extra_size]) =
+ *((struct shadow_status **)&extra[shadow_ht_extra_size]) =
m->shadow_ht_extras;
- m->shadow_ht_extras = extra;
+ m->shadow_ht_extras = &extra[0];
+
+ /* Thread a free chain through the newly-allocated nodes. */
+ for ( i = 0; i < (shadow_ht_extra_size - 1); i++ )
+ extra[i].next = &extra[i+1];
+ extra[i].next = NULL;
+ /* Add the new nodes to the free list. */
+ m->shadow_ht_free = &extra[0];
}
- // should really put this in B to go right to front
- b = m->shadow_ht_free;
- m->shadow_ht_free = b->next;
- b->spfn_and_flags = s;
- b->pfn = gpfn;
- b->next = B->next;
- B->next = b;
+ /* Allocate a new node from the quicklist. */
+ x = m->shadow_ht_free;
+ m->shadow_ht_free = x->next;
- shadow_audit(m,0);
+ /* Initialise the new node and insert directly after the head item. */
+ x->pfn = gpfn;
+ x->spfn_and_flags = s;
+ x->next = head->next;
+ head->next = x;
- return;
+ done:
+ shadow_audit(m, 0);
}
-static inline void __shadow_mk_pagetable( struct mm_struct *mm )
+static inline void __shadow_mk_pagetable(struct mm_struct *mm)
{
- unsigned long gpfn, spfn=0;
+ unsigned long gpfn = pagetable_val(mm->pagetable) >> PAGE_SHIFT;
+ unsigned long spfn = __shadow_status(mm, gpfn);
- gpfn = pagetable_val(mm->pagetable) >> PAGE_SHIFT;
+ if ( unlikely(spfn == 0) )
+ spfn = shadow_l2_table(mm, gpfn);
- if ( unlikely((spfn=__shadow_status(mm, gpfn)) == 0 ) )
- {
- spfn = shadow_l2_table(mm, gpfn );
- }
- mm->shadow_table = mk_pagetable(spfn<<PAGE_SHIFT);
+ mm->shadow_table = mk_pagetable(spfn << PAGE_SHIFT);
}
-static inline void shadow_mk_pagetable( struct mm_struct *mm )
+static inline void shadow_mk_pagetable(struct mm_struct *mm)
{
SH_VVLOG("shadow_mk_pagetable( gptbase=%08lx, mode=%d )",
pagetable_val(mm->pagetable), mm->shadow_mode );
if ( unlikely(mm->shadow_mode) )
{
- //ASSERT(local_irq_is_enabled());
shadow_lock(mm);
__shadow_mk_pagetable(mm);
shadow_unlock(mm);
SH_VVLOG("leaving shadow_mk_pagetable( gptbase=%08lx, mode=%d ) sh=%08lx",
pagetable_val(mm->pagetable), mm->shadow_mode,
pagetable_val(mm->shadow_table) );
-
}
-
#if SHADOW_DEBUG
extern int check_pagetable(struct mm_struct *m, pagetable_t pt, char *s);
#else
#define check_pagetable(m, pt, s) ((void)0)
#endif
-
#endif /* XEN_SHADOW_H */
-
-